R code for lecture 28

# reload data and refit models to wells data
wells <- read.csv("ecol 563/wells.txt")
out1 <- glm(cbind(y, n-y)~ land.use + sewer, data=wells, family=binomial)
wells$land.use2 <- factor(ifelse(wells$land.use %in% c('agri','undev'), 'rural', as.character(wells$land.use)))
wells$land.use3 <- factor(ifelse(wells$land.use2 %in% c('inst','recr','resL','resM','trans'), 'mixed', as.character(wells$land.use2)))
wells$land.use4 <- factor(ifelse(wells$land.use3 %in% c('resH','comm','indus'), 'high.use', as.character(wells$land.use3)))
out2d <- glm(cbind(y,n-y)~land.use4+sewer, data=wells, family=binomial)
out2i <- glm(cbind(y,n-y) ~ land.use4 + sewer + nitrate, data=wells, family=binomial)
 
# predict returns logits
predict(out2d)
# fitted returns probabilities
fitted(out2d)
# predict with type argument returnes probabilities
# normal-based confidence intervals for them are not appropriate here
predict(out2d, type='response', se.fit=T)
 
# create data frame containing the six unique combinations of sewer and land use
new.dat <- expand.grid(land.use4=levels(wells$land.use4), sewer=levels(wells$sewer))
 
# obtain estimated logits and their standard errors
out.p <- predict(out2d, se.fit=T, newdata=new.dat)
out.p
 
# create data frame of logits, standard errors, and categories
new.dat2 <- data.frame(new.dat, logit=out.p$fit, se=out.p$se)
new.dat2
 
# calculate Wald-based confidence intervals for logits
new.dat2$low95 <- new.dat2$logit + qnorm(.025)*new.dat2$se
new.dat2$up95 <- new.dat2$logit + qnorm(.975)*new.dat2$se
new.dat2
 
# inverse logit function
inv.logit <- function(eta) exp(eta)/(1+exp(eta))
 
# invert logits and their confidence intervals
apply(new.dat2[,c(3,5:6)], 2, inv.logit)
 
# add probabilities and their confidence intervals to data frame
p.dat <- data.frame(new.dat,apply(new.dat2[,c(3,5:6)], 2, inv.logit))
p.dat
# relabel logit column
names(p.dat)[3] <- 'prob'
p.dat
 
# obtaining confidence intervals using profile likelihood
confint(out2d)
# return logits for land use categories when sewer= no
out2d1 <- glm(cbind(y,n-y)~land.use4+sewer-1, data=wells, family=binomial)
coef(out2d)
coef(out2d1)
# obtain confidence intervals
out.c1 <- confint(out2d1)
out.c1
 
# define new sewer factor with sewer=yes the reference group
wells$sewer2 <- factor(wells$sewer, levels=rev(levels(wells$sewer)))
levels(wells$sewer)
levels(wells$sewer2)
 
# refit model using new sewer variable
out2d2 <- glm(cbind(y,n-y)~land.use4+sewer2-1, data=wells, family=binomial)
coef(out2d2)
 
out.c2 <- confint(out2d2)
out.c2 
 
# collect results in a matrix
out.profile <- data.frame(est=c(coef(out2d1)[1:3], coef(out2d2)[1:3]), rbind(out.c1[1:3,],out.c2[1:3,]))
# obtain probabilities
p.dat.profile <- data.frame(new.dat, inv.logit(out.profile))
names(p.dat.profile)[3:5] <- c('prob', 'low95', 'up95')
p.dat.profile
 
### graph probabilities and their confidence intervals ####
oldmar <- par("mar")
par(mar=c(4.1,8.1,1.1,1.1))
p.dat$both <- factor(paste(p.dat$sewer, p.dat$land.use4, sep='.'))
p.dat
p.dat$num <- as.numeric(p.dat$both)
p.dat
plot(num~prob, data=p.dat, xlab='Probability of contamination', ylab='', xlim=c(0, 0.55), type='n', axes=F)
axis(1)
axis(2, at=1:6, labels=rep(c('high use', 'mixed use', 'rural'),2), las=2, cex.axis=.9)
box()
par(lend=1)
segments(p.dat$low95, p.dat$num, p.dat$up95, p.dat$num, col='grey70', lwd=5)
points(p.dat$prob, p.dat$num, pch='|', lwd=2, cex=1.5, col=1)
mtext(side=2, at=c(2,5), text=c('Sewers absent','Sewers present'), line=5.5, cex=1.2)
abline(h=3.5, lty=2)
par(mar=oldmar)
 
### graphing probabilities with continuous predictors ####
coef(out2i)
# there are no common values of nitrate for all six categories
tapply(wells$nitrate, list(wells$land.use4,wells$sewer), mean)
tapply(wells$nitrate, list(wells$land.use4,wells$sewer), max)
tapply(wells$nitrate, list(wells$land.use4,wells$sewer), min)
 
# regression model for the logit
logit.mod <- function(mixed,rural,sewer,x) coef(out2i)[1]+ coef(out2i)[2]*mixed+coef(out2i)[3]*rural+coef(out2i)[4]*sewer+ coef(out2i)[5]*x
logit.mod(0,0,0,3)
 
# inverse logit for continuous model
prob.mod <- function(mixed,rural,sewer,x) exp(logit.mod(mixed,rural,sewer,x))/(1+exp(logit.mod(mixed,rural,sewer,x)))
prob.mod(0,0,0,3)
 
# graph over the range of the data
range(wells$nitrate)
out.range <- tapply(wells$nitrate, list(wells$land.use4, wells$sewer), range)
 
plot(c(0.8,7),c(0,0.65), xlab='nitrate', ylab='Probability', type='n')
curve(prob.mod(0,0,0,x), add=T, xlim=out.range[1,]$no , lwd=2)
curve(prob.mod(1,0,0,x), add=T, col=2, xlim=out.range[2,]$no, lwd=2)
curve(prob.mod(0,1,0,x), add=T, col=3, xlim=out.range[3,]$no, lwd=2)
curve(prob.mod(0,0,1,x), add=T, lty=2, xlim=out.range[1,]$yes, lwd=2)
curve(prob.mod(1,0,1,x), add=T, col=2, lty=2, xlim=out.range[2,]$yes, lwd=2)
curve(prob.mod(0,1,1,x), add=T, col=3, lty=2, xlim=out.range[3,]$yes, lwd=2)
legend('topleft', rep(c('high use', 'mixed use', 'rural'),2), col=rep(1:3,2), lty=rep(1:2,each=3), lwd=2, cex=.9, bty='n', ncol=2, title='Sewers absent     Sewers present')
 
# plotting log odds ratios with continuous and categorical predictors
 
wells$land.use4a <- factor(wells$land.use4, levels=rev(levels(wells$land.use4)))
levels(wells$land.use4)
levels(wells$land.use4a)
# refit model so all odds ratios are positive
out2m <- glm(cbind(y,n-y)~land.use4a+sewer+nitrate, data=wells, family=binomial)
out.conf <- confint(out2m)
out.conf
 
# log odds ratios and confidence intervals
out.mod <- data.frame(var=1:4, est=coef(out2m)[2:5], out.conf[2:5,])
out.mod
names(out.mod)[3:4] <- c('low95','up95')
out.mod
# create labels for log odds ratios
mylabs <- c('land use:\nmixed use vs rural','land use:\nhigh use vs rural','sewer:\nyes vs no', 'nitrate:\n1 mg/l increase')
# prepanel function
myprepanel.ci <- function(x,y,lx,ux,subscripts,...){
list(xlim=range(x, ux[subscripts], lx[subscripts], finite=T,...))}
 
library(lattice)
xyplot(factor(var, labels=mylabs)~est, xlab='Log odds ratio', ylab='', lx=out.mod$low95, ux=out.mod$up95, data=out.mod, prepanel=myprepanel.ci, panel=function(x,y){
panel.segments(out.mod$low95, y, out.mod$up95, y, lwd=5, col='grey70', lineend=1)
panel.xyplot(x, y, pch='|', cex=1.5, col=1, lwd=2)
panel.abline(v=0, lty=2, col=2)})
 
# randomized block design with binomial response
 
# read data in from clipboard: Windows
crabs <- read.table('clipboard', header=T)
# read data in from clipboard: Mac OS
crabs <- read.table(pipe("pbpaste"), header=T)
crabs[1:4,]
# fixed effects randomized block design
out1 <- glm(cbind(killed,6-killed)~factor(Treatment)+factor(Block_Number), data=crabs, family=binomial)
summary(out1)
 
# random effects randomized block design
library(lme4)
out1.lmer <- lmer(cbind(killed,6-killed)~factor(Treatment)+(1|Block_Number), data=crabs, family=binomial)
summary(out1.lmer)
R Code for Lecture 28 (Monday, December 3, 2012)